% scribe: Daniel Metzger
% lastupdate: Oct. 2, 2005
% lecture: 3
% title: Expected value
% references: Durrett, section 1.3
% keywords: expectation, expected value, function spaces, Banach spaces, Hilbert spaces, Lp spaces, Fatou's Lemma, Dominated Convergence Theorem, Monotone Convergence Theorem, Jensen's inequality, Markov's inequality, Chebyshev's inequality, Holder's inequality, Cauchy-Schwarz inequality, Minkowski's inequality, triangle inequality, change of variable
% end

\documentclass[12pt, letterpaper]{article}

\include{macros}


\newtheorem{remark}[theorem]{Remark}


\begin{document}

 \lecture{3}{Expected Value}{Daniel Metzger}{metzger@econ.berkeley.edu}

References: Durrett [Section 1.3]

\section{Expected Value}
% keywords: expectation, expected value
% end

Denote by $(\Omega,\F,\P)$ a probability space.

\begin{definition}
 Let $X: \Omega \to \R$ be a random variable which is measurable with respect to $\F and \borel$. The \emph{expected value} of $X$ is defined by
 \begin{equation} \label{eq:ev}
  \E(X):= \int_{\Omega} Xd\P = \int_{\Omega} X(\omega)\P(d\omega)  
 \end{equation}
 The integral is defined as in Lebesgue integration, whenever $\int_{\Omega}|X|d\P< \infty$.
\end{definition}

\begin{theorem} [Existence of the integral for nonnegative e.r.r.v.]
 Let $(\Omega, \F, \P)$ be a probability space. There is a unique functional $\E:X\mapsto \E(X) \in [0,\infty]$ such that
 \begin{eqnarray}
     \E(\1_A) &=& \P(A), \quad \forall \ A\in \F \label{eq:prob}\\
     \E(cX)&=& c \E(X), \quad \forall\ c \geq 0, X \geq 0 \label{eq:lin1}\\
     \E(X+Y)&=&  \E(X)+\E(Y), \quad \forall\ X,Y \geq 0 \label{eq:lin2}\\
     X \leq Y &\Rightarrow& \E(X)\leq \E(Y) \label{eq:monoton} \\
     X_n \uparrow X &\Rightarrow& \E(X_n) \uparrow \E(X)  \label{eq:cont}  
  \end{eqnarray} 
\end{theorem}

\begin{proofsketch}
>From these desired properties, we see immediately how to define $\E(X)$. The procedure is well known from Lebesgue integration. First extend $\E$ from indicators to simple r.v.'s by linearity, then to positive r.v.'s by continuity from below, and finally check that everything is consistent.
\begin{description}
\item{Step 1: Simple random variables}
  
  Check that if $X=\sum_{i=1}^n c_i \1_{A_i}$ is a simple random variable, then 
  \begin{equation}
    \E(X) = \sum_{i=1}^n c_i \P(A_i)
  \end{equation}
  works. Verify that $\E$ is well defined, etc.
  
\item{Step 2: Nonnegative random variables}
  
  Now use (\ref{eq:cont}) to extend $\E$ for general $X\geq 0$. We know that there exists an increasing sequence $X_n$ of r.v. with $X_n \uparrow X$. Now see that $\E(X_n)\uparrow$ (by monotonicity of $\E$). Define 
  \begin{equation}
    \E(X) = \lim_{n \rightarrow \infty}\E(X_n)
  \end{equation}
  Verify again that $\E(X)$ is well defined. 
  \begin{remark}
    Note that $\E(X)=+\infty$ is possible even if $\P(X<\infty)=1$. As an example look at $G$ which is a geometric r.v., i.e. $\P(G=g)=2^{-g}, \forall \ g=1,2,3,\dots$ Note that\ $\P(G<\infty)=1$, but $\E(2^G)=\sum_{i=1}^{\infty}2^g2^{-g}=\infty$. 
  \end{remark}	
  
\item{Step 3: Signed random variables}

  Write $X$ as $X = X^+-X^-$, where $X^+:=\max(X,0)$ and $X^-:=-\min(X,0)$. Define $\E(X)$ as follows
  \begin{equation}
    \E(X)=\E(X^+)-\E(X^-)
  \end{equation}
  provided this expression is not $\infty-\infty$. Such $X$ are 
\emph{quasi-integrable}. $X$ is \emph{integrable} if $\E(|X|)<\infty$. 
\end{description}
\end{proofsketch}

\section{Function spaces}
% keywords: function spaces, Banach spaces, Hilbert spaces, Lp spaces
% end

\begin{description}
 \item{Banach spaces:}
 Let $X$ be a normed space with norm $||\cdot||_X$. If $X$ is complete with respect to the induced metric $d(x,y) := ||x-y||_X$, it is called a \emph{Banach space}.
 
 \item{Hilbert spaces:}
 Let $K$ be a vector space with an inner product $(\cdot,\cdot)_K$. If $K$ is complete with respect to the induced metric $d(x,y) := \sqrt{(x-y,x-y)_K}$, it is called a \emph{Hilbert space}.
 
 \item{$L^p(\mu,Y)$ spaces:} Let $(S, \mathcal{S}, \mu)$ be a measure space and $Y$ a Banach space. 
 \begin{equation*}
   L^p(\mu,Y) := \{f:S\to Y\mbox{: }f \mu\text{-measurable}, \int |f|^p < \infty \}
 \end{equation*}
 defines the $L^p$ spaces with an equivalence relation $f=g \text{ in}\ L^p :\Longleftrightarrow f=g\ \mu\mbox{-everywhere}$ (it can be shown by Chebyshev's inequality that $||X||_p = 0 \Longleftrightarrow \P(X=0)=1$).\\
 \\
 Let $||\cdot||_p := (\int |\cdot|^p)^{1/p}$ be the \emph{$L^p$ norm} of $S$. Define convergence in $L^p$ as follows:
 \begin{equation}
 X_n \stackrel{L^p}{\rightarrow} X \Longleftrightarrow ||X_n - X||_p \rightarrow 0
 \end{equation}
 
 It can be shown that $L^p$ is complete, i.e. if $\lim_{n,m \rightarrow \infty} ||X_n - X_m||_p=0, \exists \text{ a r.v.}\ X \text{ s.t.}\ X_n \stackrel{L^p}{\rightarrow}  X$. Therefore $L^p$ spaces are Banach spaces for $p \geq 1$.\\
 \\
 $L^2$ is even a Hilbert space with the inner product $(f,g) = \int fg\ d\mu$.
\end{description}

For $p=1$ $L^p$ corresponds with the space of integrable functions.
\begin{equation}
 L^1(\Omega, \F,\P) := \{X : X \text{ is r.v. with}\ \E(|X|)< \infty\}
\end{equation}

\section{Integration to the limit}
% keywords: Fatou's Lemma, Dominated Convergence Theorem, Monotone Convergence Theorem
% end
 
 \begin{theorem}[Fatou's Lemma]
 If $X_n \geq 0$ then $\liminf_{n \rightarrow \infty} \geq \E(\liminf_{n \rightarrow \infty}X_n)$.
 \end{theorem}
 
 \begin{example} \label{ex:fatou}
  Define $X_n$ on $[0,1]$ as $X_n = n \1_{(0,1/n)}$. 
  \begin{equation}
   \lim_{n \rightarrow \infty} \E(X_n) = \lim_{n \rightarrow \infty} 1 = 1 \geq 0 = \E(0) = \E\left(\lim_{n \rightarrow \infty} X_n\right)
   \end{equation} 
 \end{example}
 
 \begin{theorem}[Monotone Convergence Theorem]
 If $0\leq X_n \uparrow X$ then $\E(X_n) \uparrow \E(X)$.
 \end{theorem}
  
  \begin{theorem}[Dominated Convergence Theorem]
  If $X_n \rightarrow X$ a.s., $|X_n|\leq Y$ for all $n$, and $\E(Y) < \infty$, then $\E(X_n) \rightarrow \E(X)$.
 \end{theorem}
 
 \begin{remark}
  Fatou's Lemma is usually applied when $\lim_n X_n$ exists, so the $\liminf$ on the left is a $\lim$. Remember example \ref{ex:fatou} to get the inequality the right way.
 \end{remark}
 \begin{remark}
  The simplest bound $Y$ in the dominated convergence theorem is a constant. (This works because we are in a finite measure space!)
 \end{remark} 
 
\section{Inequalities}
% keywords: Jensen's inequality, Markov's inequality, Chebyshev's inequality, Holder's inequality, Cauchy-Schwarz inequality, Minkowski's inequality, triangle inequality
% end


Let $X,Y$ etc. be real r.v.'s defined on $(\Omega, \F, \P)$.
 
 \begin{theorem}[Jensen's Inequality]
   Let $\varphi$ be convex, $\E(|X|)<\infty$, $\E(|\varphi(X)|)<\infty$. Then
   \begin{equation} \label{eq:jenson}
     \varphi(\E(X)) \leq \E(\varphi(X))
   \end{equation}
 \end{theorem}

 \begin{proofsketch}
  As $\varphi$ is convex, $\varphi$ is the supremum of a countable collection of lines. 
   \begin{eqnarray*}
      \varphi(x) &=& \sup_n L_n(x), \quad  L_n(x) = a_nx+b_n \notag \\
      L_n(\E X) &\stackrel{(1)}{=}& \E(L_n(X)) \\
             &\stackrel{(2)}{\leq}& \E(\varphi(X))
   \end{eqnarray*}
   Take sup on $n$.\\
   (1) used linearity, (2) used monotonicity.
 \end{proofsketch}
 
 \begin{example}
  \begin{eqnarray}
   && ||X||_p \uparrow \text{as}\ p \uparrow\\
   && |\E(X)|\leq \E(|X|)
  \end{eqnarray}
 \end{example}
 
 
 \begin{theorem}[Markov's Inequality]
  If $X \geq 0, \ a>0$, then
   \begin{equation} \label{eq:markov}
     \P(X\geq a) \leq \E(X)/a
   \end{equation}
 \end{theorem}
 \begin{proof}
  Integrate $\1_{X\geq a} \leq X/a$. The stated result follows by monotonicity and linearity.
 \end{proof}
 
 \begin{theorem}[Chebyshev's Inequality]
 Let $\psi:\R_+ \to \R_+$ be increasing. Then
   \begin{equation} \label{eq:chebyshev}
     \P(|Y| > b) \leq \E(\psi(|Y|))/\psi(b)
   \end{equation} 
 \end{theorem}
 \begin{proof}
   $$\P(|Y|>b)\stackrel{(1)}{=}\P(\psi(|Y|)>\psi(b))\stackrel{(2)}{\leq}\E(\psi(|Y|))/\psi(b)$$
   (1) used that $\psi$ is increasing, and (2) used Markov's inequality.
 \end{proof}
 
 \begin{example}
   Note important examples $\psi(x)=x^p, \exp(x)$, etc.
   \begin{eqnarray}
     \psi(x)=x^2 &\Longrightarrow& \P(|Y|>b) \leq \E(Y^2)/b^2 \\
     X = Y-\E(Y) &\Longrightarrow& \P(|Y-\E(Y)| > b) \leq \E\left((Y-\E(Y))^2\right)/b^2
   \end{eqnarray}
 \end{example}
 
 \begin{theorem}[H\"older's Inequality]
 If $p,q \in [1,\infty]$ with $1/p+1/q=1$ then \begin{equation} \E(|XY|)\leq ||X||_p||Y||_q \end{equation}
 Here $||X||_r = (\E(|X|^r))^{1/r}$ for $x \in [1,\infty)$; and $||X||_{\infty} = \inf\{M:\P(|X|>M)=0\}$.
 \end{theorem}
 \begin{proof}
  See the proof of (5.2) in the Appendix of Durrett.
 \end{proof}
 \begin{example}
  If $|Y|\leq b$ then $$\E(|XY|)\leq b\E(|X|)$$
 \end{example}
 
 \begin{theorem}[Cauchy-Schwarz Inequality]
 The special case $p=q=2$ is the Cauchy-Schwarz inequality. 
 \begin{equation}
  \E(|XY|)\leq (\E(X^2)\E(Y^2))^{1/2}
 \end{equation}
 \end{theorem}
 \begin{proof}
  Apply H\"older's inequality for $p=q=2$.
 \end{proof}
 
 
 \begin{theorem}[Minkowski's Inequality (Triangle inequality for $L^p$)]
  $$||X+Y||_p \leq ||X||_p + ||Y||_p$$
 \end{theorem}
% \begin{proofsketch}
% 
% \end{proofsketch}

\section{Change of variable in integration}
% keywords: change of variable
% end


%\begin{eqnarray}
% \begin{array}{ccccc}
%(\Omega,\F,\P) & & y \circ x& & (T,\mathcal{T}, \P_{y \circ x})\\
% &&\longrightarrow&&\\
%  x & \searrow & & \nearrow & y \\
% \\
% & & (S,\mathcal{S},\P_x) & &
%\end{array} 
%\end{eqnarray}

Let $(\Omega, \F,\P)$ be a probability space and $X:\Omega \to S$ a $(\F/\mathcal{S})$-measurable random variable. $X$ induces a new probability measure $\P_X$ on $(S,\mathcal{S})$.
\begin{definition}
 $\P_X(A) = \P(X \in A) = \P(X^{-1}(A))$ is called the $\P$ law of $X$ or the $\P$ distribution of $X$. 
\end{definition}
Let $(T,\mathcal{T})$ be another measurable space and $Y: S \to T$ a measurable map. Then we have transitivity of the image laws.

\begin{figure}
 \setlength{\unitlength}{1cm}
 \begin{picture}(8,6)
  \put(3,4.1){\vector(1,0){3}}
  \put(4.7,1.5){\vector(1,1){2}}
  \put(2.3,3.5){\vector(1,-1){2}}

  \put(1.3,4){$(\Omega, \F, \P)$}
  \put(6.3,4){$(T, \mathcal{T}, \P_{Y \circ X})$}
  \put(3.8,1.0){$(S,\mathcal{S},\P_X)$}

  \put(2.3,2.2){$X$}
  \put(6.5,2.2){$Y$}
  \put(4.1,4.3){$Y\circ X$}
 \end{picture}
\caption{An illustration of the transitivity of the image laws.}
\end{figure}

\begin{theorem}[Transitivity of the image laws]
The $\P$ distribution of $Y \circ X$ is equal to the $\P_X$ distribution of $Y$.
\end{theorem}

\begin{theorem}[Change of variable formula]
Let $Y$ be a real-valued r.v.\ on $(S,\mathcal{S})$. $Y$ is $\P_X$-integrable iff $Y\circ X$ is $\P$-integrable, and then $$\int_S Y d\P_X = \int_{\Omega} (Y\circ X) d\P$$
\end{theorem}
\begin{proof}
 Fix $X$ and vary $Y$. For indicators $Y$ the identity is the transitivity of image laws, and this passes to simple r.v.'s $Y$, then all r.v.'s Y. See Durrett [1.3, pp. 17]
\end{proof}
\end{document}